#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#Merge ECHO Nitrogen and Phosphorous
#Count facilities in each huc12 year


#load packages
#load libraries
library(ggplot2)
library(exactextractr)
library(data.table)
library(tidyverse)
library(Hmisc)
library(haven)
library(stargazer)
library(purrr)
library(tidyr)
library(collapse)
library(ggthemes)
library(mapview)
options(scipen=999)
library(stringr)
library(data.table)

setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package")


#nitrogen first
n2008 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2008.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2009 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2009.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2010 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2010.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2011 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2011.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2012 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2012.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2013 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2013.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2014 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2014.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2015 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2015.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2016 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2016.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2017 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2017.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
n2018 <- read.csv(file="Data/ECHO/Raw/Nitrogen/nitrogen2018.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))

#bind rows
names(n2008)
n2008<-n2008[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2009<-n2009[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2010<-n2010[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2011<-n2011[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2012<-n2012[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2013<-n2013[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2014<-n2014[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2015<-n2015[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2016<-n2016[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2017<-n2017[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
n2018<-n2018[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]

n<-dplyr::bind_rows(n2008, n2009, n2010, n2011, n2012, n2013,
                    n2014, n2015, n2016, n2017, n2018)
rm(n2008)
rm(n2009)
rm(n2010)
rm(n2011)
rm(n2012)
rm(n2013)
rm(n2014)
rm(n2015)
rm(n2016)
rm(n2017)
rm(n2018)

#explore
summary(n)
colnames(n)<-c("Year", "NpedesPermitNumber", "FacilityName",
               "FrsId", "TriFacilityId", "CwnsId", "huc12",
               "ParameterCode", "Parameter", "PollutantLoadKgYr", 
               "WastewaterFlowMgalYr", "AvgDailyLoadKgDay", "AvgConcMgL",
               "AvgDailyFlowMgDay")

#what nutrient being emitted?
table(n$Parameter)

#any missing huc12s?
table(is.na(n$huc12))

#what is the id?
#how many unique facilities
length(unique(n$NpedesPermitNumber))
length(unique(n$FacilityName))
length(unique(n$FrsId))

#are there missing ones?
table(is.na(n$FacilityName))
table(is.na(n$NpedesPermitNumber))
table(is.na(n$FrsId))

#not going to be Tri or Cwns. 
length(unique(n$TriFacilityId))
length(unique(n$CwnsId))

#A nine-character code used to uniquely identify a permitted NPDES facility (NPDES ID). 
#The NPDES permit program regulates the direct discharge of pollutants into US waters. 
#A NPDES ID is provided for every facility.

#Facility Name is the primary name used to identify a facility.

#FRS ID is the 12-character code used to uniquely identify a facility site within 
#the EPA Facility Registry System (FRS) database. The code is also known as the UIN 
#(Unique Identification Number). 
#A FRS ID is assigned to every facility in ICIS-NPDES.


#use the NpedesPermitNumber when aggregating!

###############################################################################
#Aggregate to panel
n1<-n[,c(1,2,7)]

length(unique(n$NpedesPermitNumber))

n1<-unique(n)
length(unique(n1$NpedesPermitNumber))

#count number of permitted facilities by huc year
panel<- n1 %>% group_by(huc12, Year) %>% summarise(NitrogenFacilities=n())
summary(panel)

big<-subset(panel, panel$NitrogenFacilities>100)
list(big)

#save the nitrogen panel
npermits<-panel
save(npermits, file="Data/ECHO/Processed/nitrogen_point_huc12_year_panel.RData")


#################################################################################
#do phosphorous next

p2008 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2008.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2009 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2009.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2010 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2010.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2011 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2011.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2012 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2012.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2013 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2013.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2014 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2014.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2015 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2015.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2016 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2016.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2017 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2017.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))
p2018 <- read.csv(file="Data/ECHO/Raw/Phosphorous/phosphorous2018.csv", skip = 3, header = T, colClasses=c("HUC.12.Code"="character"))

#bind rows
names(p2008)
p2008<-p2008[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2009<-p2009[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2010<-p2010[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2011<-p2011[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2012<-p2012[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2013<-p2013[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2014<-p2014[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2015<-p2015[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2016<-p2016[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2017<-p2017[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]
p2018<-p2018[,c(1:6, 23, 36,37, 46, 52, 53, 54,55)]

p<-dplyr::bind_rows(p2008, p2009, p2010, p2011, p2012, p2013,
                    p2014, p2015, p2016, p2017, p2018)
rm(p2008)
rm(p2009)
rm(p2010)
rm(p2011)
rm(p2012)
rm(p2013)
rm(p2014)
rm(p2015)
rm(p2016)
rm(p2017)
rm(p2018)

#explore
colnames(p)<-c("Year", "NpedesPermitNumber", "FacilityName",
               "FrsId", "TriFacilityId", "CwnsId", "huc12",
               "ParameterCode", "Parameter", "PollutantLoadKgYr", 
               "WastewaterFlowMgalYr", "AvgDailyLoadKgDay", "AvgConcMgL",
               "AvgDailyFlowMgDay")
summary(p)

#what nutrients are being emitted?
table(p$Parameter)

table(p$Year)

#unique obs
length(unique(p$NpedesPermitNumber))
length(unique(p$FacilityName))

#any missing huc12s?
table(is.na(p$huc12))

p1<-p[,c(1,2,7)]

length(unique(p$NpedesPermitNumber))

p1<-unique(p)
length(unique(p1$NpedesPermitNumber))

#count number of permitted facilities by huc year
panel2<- p1 %>% group_by(huc12, Year) %>% summarise(PhosphorousFacilities=n())
summary(panel2)

big<-subset(panel2, panel2$PhosphorousFacilities>50)
list(big)

#save the nitrogen panel
ppermits<-panel2
save(ppermits, file="Data/ECHO/Processed/phosphorous_point_huc12_year_panel.RData")
